#################################################
####       Replication Code for              ####
####  A Re-Assessment of Reporting Bias in   ####
#### Event-Based Violence Data with Respect  ####
####         to Cell Phone Coverage          ####
#################################################
####                                         ####  
####                1/18/2017                ####         
####      (3) Generate Results from          #### 
####            Simulation                   ####
#################################################


library(haven)
library(foreign)
library(ggplot2)
library(plyr)
library(doParallel)
library(plm)
library(lmtest)
library(sandwich)

###set your working directory if not already done so

##########################################
##### Results for Figure 2
##########################################

load("dataForReplication_cell.rda")

length(new$conf2008_dum[new$cell_dum_07==0 & new$conf2008_dum==1]) #182
length(new$conf2008_dum[new$cell_dum_07==1 & new$conf2008_dum==1]) #176
length(new$conf2008_dum[new$conf2008_dum==1]) #358

seeds = seq(0,1000,length.out= 1000)

share_added = rep(NA,6)
# 5% is 10
share_added[1] = round(182/0.95 - 182,0)
# 10% is 20
share_added[2] = round(182/0.90 - 182,0)
# 15% is 32
share_added[3] = round(182/0.85 - 182,0)
# 20% is 46
share_added[4] = round(182/0.8 - 182,0)
# 25% is 61
share_added[5] = round(182/0.75 - 182,0)
# 30% is 78
share_added[6] = round(182/0.7 - 182,0)



#####
data = new[,c("cow", "gid","conf2008_dum","cell_dum_07","pre2000_count","bdist1","capdist","pop2005","mnt","irri","gcppc00")]

total_event_nonCell = sum(new$conf2008_dum[new$cell_dum_07 == 0])

simulation = function(prob){
  simuTotal = round(total_event_nonCell/prob - total_event_nonCell,0)
  data_new = data
  addGids = sample(new$gid[new$cell_dum_07 == 0], simuTotal, replace = TRUE)
  data_new$conf_dum_sim = data_new$conf2008_dum
  data_new$conf_dum_sim[data_new$gid %in% addGids] = 1
  data_new = data_new[,-which(names(data_new)%in%c("gid","conf2008_dum"))]
  data_new = data_new[complete.cases(data_new),]
  m6 = lm(conf_dum_sim ~ pre2000_count + bdist1+capdist+pop2005+mnt+irri+gcppc00+cell_dum_07+factor(cow),data=data_new)
  res = c(prob,summary(m6)$coef[9,1], summary(m6)$coef[9,2])
  names(res) = c("prop","coef","se")
  return(res)
}


percent_reported = c(0.95,0.9,0.85,0.8,0.75,0.7,0.65)
sim_run = rep(percent_reported,1000)
seeds = seq(1,7000,1)

sim_dat = data.frame(sim_run, seeds)
sim_dat = sim_dat[order(sim_run),] 

cl = makeCluster(4)
registerDoParallel(cl)

simu_out_CS_completeRandom = foreach(i = 1:7000,
                             .export = c("sim_dat","simulation"),
                             .combine = rbind
) %dopar% {
  set.seed(sim_dat[i,2])
  result = simulation(sim_dat[i,1])
  return(result)
}
save(simu_out_CS_completeRandom, file = "simu_out_CS_completeRandom.rda")






###### Generate results for Figure 8


data_full = read_dta("panel.dta")
#names(data_full)

data_full$year = NA 

data_full$year[data_full$dum08==1] = 2008
data_full$year[data_full$dum09==1] = 2009
data_full$year[is.na(data_full$year)] = 2007

dat = data_full[,c("gid","conf_dum","year","cell","pre2000_count","bdist1","capdist","pop2005","mnt","irri","gcppc00")]
rm(data_full)

seeds = seq(1,7000,1)

#iter = 1

#names(data)

#summary(data$conf_dum)

# 529 in cell without coverage 

share_added = rep(NA,7)
# 5% is 28
share_added[1] = round(529/0.95 - 529,0)
# 10% is 59
share_added[2] = round(529/0.90 - 529,0)
# 15% is 94
share_added[3] = round(529/0.85 - 529,0)
# 20% is 133
share_added[4] = round(529/0.8 - 529,0)
# 25% is 177
share_added[5] = round(529/0.75 - 529,0)
# 30% is 227
share_added[6] = round(529/0.7 - 529,0)
#35% is 285
share_added[7] = round(529/0.65 - 529,0)

fake_data_analysis = function(percent,iteration){
  data = dat[,c("gid","year","conf_dum","cell")]
  data_forPred = dat
  set.seed(seeds[iteration])
  dataComplete = data[complete.cases(data_forPred),]
  data_forPred = data_forPred[complete.cases(data_forPred),]
  m07 = lm(conf_dum ~ pre2000_count + bdist1+capdist+pop2005+mnt+irri+gcppc00,data=data_forPred[data_forPred$year==2007,])
  dataComplete$pred = 0
  dataComplete$pred[dataComplete$year == 2007] = predict(m07)
  m08 = lm(conf_dum ~ pre2000_count + bdist1+capdist+pop2005+mnt+irri+gcppc00,data=data_forPred[data_forPred$year==2008,])  
  dataComplete$pred[dataComplete$year == 2008] = predict(m08)
  m09 = lm(conf_dum ~ pre2000_count + bdist1+capdist+pop2005+mnt+irri+gcppc00,data=data_forPred[data_forPred$year==2009,])
  dataComplete$pred[dataComplete$year == 2009] = predict(m09)
  dataComplete$pred[dataComplete$pred < 0] = 0 
  dataComplete$pred[dataComplete$pred > 1] = 1
  dataComplete$conf_dum_new = dataComplete$conf_dum
  sampled = sample(which(dataComplete$conf_dum ==0 & dataComplete$cell==0),size = share_added[percent],replace=FALSE, prob = dataComplete$pred[dataComplete$conf_dum ==0 & dataComplete$cell==0])
  dataComplete$conf_dum_new[sampled] = 1
  pdat = pdata.frame(dataComplete, c("gid","year"))
  m1 = plm(conf_dum_new ~ cell,pdat, model="within",effect="twoways", index=c('gid', 'year'))
  ses = coeftest(m1, vcov=function(x) vcovHC(m1, cluster="group"))
  res = c(iteration,share_added[percent], m1$coef, ses[2])
  names(res) = c("iteration", "added","coef","se")
  rm(data_forPred)
  rm(dataComplete)
  rm(data)
  rm(pdat)
  rm(m07,m08,m09,m1)
  return(res)
}
iterations = rep(seq(1,1000,1),7)


added = c(rep(1,1000),rep(2,1000),rep(3,1000),rep(4,1000),rep(5,1000),rep(6,1000),rep(7,1000))

sim_dat = data.frame(iterations,added)


cl = makeCluster(4)
registerDoParallel(cl)

simu_out_Panel_predicted = foreach(i = 1:7000,
                           .export = c("fake_data_analysis","sim_dat","share_added","dat"),
                           .packages = c("plm","lmtest"),
                           .combine = rbind
) %dopar% {
  library(plm)
  library(lmtest)
  iteration = sim_dat[i,1]
  percent = sim_dat[i,2]
  result = fake_data_analysis(percent, iteration)
  return(result)
}

save(simu_out_Panel_predicted, file="simu_out_Panel_predicted.rda",compress= "xz")

###### Generate results for Figure 9


load("dataForReplication_cell.rda")


data = new[,c("cow","conf2008_dum","cell_dum_07","pre2000_count","bdist1","capdist","pop2005","mnt","irri","gcppc00")]

seeds = seq(0,7000,length.out= 7000)


share_added = rep(NA,7)
# 5% is 10
share_added[1] = round(182/0.95 - 182,0)
# 10% is 20
share_added[2] = round(182/0.90 - 182,0)
# 15% is 32
share_added[3] = round(182/0.85 - 182,0)
# 20% is 46
share_added[4] = round(182/0.8 - 182,0)
# 25% is 61
share_added[5] = round(182/0.75 - 182,0)
# 30% is 78
share_added[6] = round(182/0.7 - 182,0)
# 35% is 
share_added[7] = round(182/0.65 - 182,0)






fake_data_analysis = function(percent,iteration){
  set.seed(seeds[iteration])
  data_new = data[complete.cases(data),]
  data_new$conf_dum_new = data_new$conf2008_dum
  m1 = lm(conf2008_dum ~ pre2000_count + bdist1+capdist+pop2005+mnt+irri+gcppc00+factor(cow),data=data_new)
  pred = predict(m1)
  pred0 = pred[data_new$conf2008_dum == 0 & data_new$cell_dum_07==0]
  pred0[pred0<0] = 0
  pred0[pred0>1] = 1
  changeConf = sample(length(data_new$conf_dum_new[data_new$conf2008_dum == 0 & data_new$cell_dum_07==0]),size = share_added[percent], replace = FALSE,prob = pred0)
  
  data_new$conf_dum_new[data_new$conf2008_dum == 0 & data_new$cell_dum_07==0][changeConf] = 1
  m6 = lm(conf_dum_new ~ pre2000_count + bdist1+capdist+pop2005+mnt+irri+gcppc00+cell_dum_07+factor(cow),data=data_new)
  res = c(iteration,share_added[percent],summary(m6)$coef[9,1], summary(m6)$coef[9,2])
  names(res) = c("iteration", "added","coef","se")
  rm(data_new)
  rm(m6)
  return(res)
}
iterations = rep(seq(1,1000,1),7)


added = c(rep(1,1000),rep(2,1000),rep(3,1000),rep(4,1000),rep(5,1000),rep(6,1000),rep(7,1000))

sim_dat = data.frame(iterations,added)


cl = makeCluster(4)
registerDoParallel(cl)


simu_out_CS_predicted = foreach(i = 1:7000,
                         .export = c("fake_data_analysis","sim_dat","share_added","data"),
                         .packages = c("sandwich"),
                         .combine = rbind
) %dopar% {
  iteration = sim_dat[i,1]
  percent = sim_dat[i,2]
  result = fake_data_analysis(percent, iteration)
  return(result)
}


save(simu_out_CS_predicted , file="simu_out_CS_predicted.rda",compress= "xz")


#### Generate results for Figure 10


data_full = read_dta("panel.dta")
names(data_full)

data_full$year = NA 

data_full$year[data_full$dum08==1] = 2008
data_full$year[data_full$dum09==1] = 2009
data_full$year[is.na(data_full$year)] = 2007


data = data_full[,c("gid","year","conf_dum","cell")]
rm(data_full)

seeds = seq(0,7000,length.out= 7000)

# 529 in cell without coverage 

share_added = rep(NA,6)
# 5% is 28
share_added[1] = round(529/0.95 - 529,0)
# 10% is 59
share_added[2] = round(529/0.90 - 529,0)
# 15% is 94
share_added[3] = round(529/0.85 - 529,0)
# 20% is 133
share_added[4] = round(529/0.8 - 529,0)
# 25% is 177
share_added[5] = round(529/0.75 - 529,0)
# 30% is 227
share_added[6] = round(529/0.7 - 529,0)
# 35% is 285
share_added[7] = round(529/0.65 - 529,0)



fake_data_analysis = function(percent,iteration){
  set.seed(seeds[iteration])
  data$conf_dum_new = data$conf_dum
  sampled = sample(which(data$conf_dum ==0 & data$cell==0),share_added[percent],replace=FALSE)
  data$conf_dum_new[sampled] = 1
  pdat = pdata.frame(data, c("gid","year"))
  m1 = plm(conf_dum_new ~ cell,pdat, model="within",effect="twoways", index=c('gid', 'year'))
  ses = coeftest(m1, vcov=function(x) vcovHC(m1, cluster="group"))
  res = c(iteration,share_added[percent], m1$coef, ses[2],ses[4])
  rm(m1)
  rm(pdat)
  return(res)
}
iterations = rep(seq(1,1000,1),7)


added = c(rep(1,1000),rep(2,1000),rep(3,1000),rep(4,1000),rep(5,1000),rep(6,1000),rep(7,1000))




cl = makeCluster(4)
registerDoParallel(cl)



simu_out_Panel_completeRandom = foreach(i = 1:7000,
                   .export = c("fake_data_analysis","sim_dat","share_added","data"),
                   .packages = c("plm","lmtest"),
                   .combine = rbind
) %dopar% {
  iteration = sim_dat[i,1]
  percent = sim_dat[i,2]
  result = fake_data_analysis(percent, iteration)
  return(result)
}

simu_out_Panel_completeRandom = data.frame(simu_out_Panel_completeRandom)

save(simu_out_Panel_completeRandom, file="simu_out_Panel_completeRandom.rda",compress= "xz")






